from pyspark.sql import SparkSession
import os

from cn.it.tag.base.BaseModel import BaseModel

"""
-------------------------------------------------
   Description :	TODO：
   SourceFile  :	PoliticalFaceModel
   Author      :	it team
-------------------------------------------------
"""
# 匹配类 政治
# 0.设置系统环境变量
os.environ['JAVA_HOME'] = '/export/server/jdk1.8.0_241/'
os.environ['SPARK_HOME'] = '/export/server/spark-2.4.8-bin-hadoop2.7'
os.environ['PYSPARK_PYTHON'] = '/root/anaconda3/envs/pyspark_env/bin/python'
os.environ['PYSPARK_DRIVER_PYTHON'] = '/root/anaconda3/envs/pyspark_env/bin/python'
# 政治面貌
class PoliticalFaceModel(BaseModel):
    def compute(self, es_df, five_df):
        es_df.printSchema()
        es_df.show()
        five_df.printSchema()
        five_df.show()
        new_df = es_df.join(five_df,es_df['politicalface'] == five_df['rule'],'left')\
            .select(es_df['id'].alias("userId"),five_df['id'].alias("tagsId"))
        new_df.printSchema()
        new_df.show()
        return new_df



if __name__ == '__main__':
    model = PoliticalFaceModel(62)
    model.execute()